Task 1: Data Preprocessing¶

In [ ]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.datasets import make_circles
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score

import time
In [ ]:
# Mount drive folder into Colab filesystem
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
In [ ]:
#IMPORTANT works only with absolute paths
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/data/dataset_lab_1.csv')
new_df = df.copy()
df
Out[ ]:
Flow Duration Flow IAT Mean Fwd PSH Flags Bwd Packet Length Mean Bwd Packet Length Max Flow Bytes/s Down/Up Ratio SYN Flag Count Fwd Packet Length Mean Fwd IAT Std Packet Length Mean Fwd Packet Length Max Subflow Fwd Packets Flow Packets/s Total Fwd Packets Destination Port Label
0 303376 3.033760e+04 0 749.4 1448 12743.263 0 0 19.833334 98776.15 322.16666 119 6 36.258636 6 443 Benign
1 117 1.170000e+02 0 0.0 0 0.000 1 0 0.000000 0.00 0.00000 0 1 17094.018000 1 52631 Benign
2 142 1.420000e+02 0 0.0 0 84507.040 0 0 6.000000 0.00 6.00000 6 2 14084.507000 2 80 Benign
3 191 6.366667e+01 0 52.0 52 921465.940 1 0 36.000000 0.00 42.40000 36 2 20942.408000 2 53 Benign
4 4 4.000000e+00 0 0.0 0 3000000.000 0 0 6.000000 0.00 6.00000 6 2 500000.000000 2 49467 Benign
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
31502 5710955 1.903652e+06 0 0.0 0 0.000 0 0 0.000000 4037277.80 0.00000 0 3 0.700408 3 80 Brute Force
31503 5862561 1.954187e+06 0 0.0 0 0.000 0 0 0.000000 4144374.80 0.00000 0 3 0.682296 3 80 Brute Force
31504 5854872 1.951624e+06 0 0.0 0 0.000 0 0 0.000000 4139029.80 0.00000 0 3 0.683192 3 80 Brute Force
31505 5555547 1.851849e+06 0 0.0 0 0.000 0 0 0.000000 3927356.50 0.00000 0 3 0.720001 3 80 Brute Force
31506 5018204 1.672735e+06 0 0.0 0 0.000 0 0 0.000000 3547404.80 0.00000 0 3 0.797098 3 80 Brute Force

31507 rows × 17 columns

In [ ]:
df.describe()
Out[ ]:
Flow Duration Flow IAT Mean Fwd PSH Flags Bwd Packet Length Mean Bwd Packet Length Max Flow Bytes/s Down/Up Ratio SYN Flag Count Fwd Packet Length Mean Fwd IAT Std Packet Length Mean Fwd Packet Length Max Subflow Fwd Packets Flow Packets/s Total Fwd Packets Destination Port
count 3.150700e+04 3.150700e+04 31507.000000 31507.000000 31507.000000 3.148700e+04 31507.000000 31507.000000 31507.000000 3.150700e+04 31507.000000 31507.000000 31507.000000 3.150700e+04 31507.000000 31507.000000
mean 1.647581e+07 1.438258e+06 0.038277 310.232250 896.532040 inf 0.639382 0.038277 44.150959 4.653801e+06 163.155706 170.496652 6.181293 inf 6.181293 7367.544292
std 3.526316e+07 4.122202e+06 0.191868 605.892466 1945.454241 NaN 0.531334 0.191868 115.215481 1.185644e+07 290.189858 501.331438 109.450416 NaN 109.450416 17104.518207
min -1.000000e+00 -1.000000e+00 0.000000 0.000000 0.000000 -1.200000e+07 0.000000 0.000000 0.000000 0.000000e+00 0.000000 0.000000 1.000000 -2.000000e+06 1.000000 0.000000
25% 6.500000e+01 5.500000e+01 0.000000 5.500000 6.000000 1.199456e+02 0.000000 0.000000 2.000000 0.000000e+00 3.333333 2.000000 1.000000 2.032897e+00 1.000000 53.000000
50% 2.427100e+04 8.096333e+03 0.000000 51.000000 56.000000 4.830598e+03 1.000000 0.000000 32.000000 0.000000e+00 47.800000 35.000000 2.000000 1.581044e+02 2.000000 80.000000
75% 5.216842e+06 5.558727e+05 0.000000 176.000000 271.000000 1.538462e+05 1.000000 0.000000 50.000000 7.627202e+05 112.000000 198.000000 5.000000 3.174603e+04 5.000000 1066.000000
max 1.199985e+08 1.140000e+08 1.000000 3865.000000 11595.000000 inf 10.000000 1.000000 3431.806400 8.280000e+07 1903.272700 23360.000000 18605.000000 inf 18605.000000 65533.000000
In [ ]:
print(df.shape)
# Remove duplicates
df = df.drop_duplicates()
print(df.shape)
# Remove rows with missing values
df = df.dropna()
print(df.shape)
# remove rows with inf
df = df[~df.isin([np.inf, -np.inf]).any(axis=1)]
print(df.shape)
(31507, 17)
(29396, 17)
(29393, 17)
(29386, 17)
In [ ]:
 df.describe()
Out[ ]:
Flow Duration Flow IAT Mean Fwd PSH Flags Bwd Packet Length Mean Bwd Packet Length Max Flow Bytes/s Down/Up Ratio SYN Flag Count Fwd Packet Length Mean Fwd IAT Std Packet Length Mean Fwd Packet Length Max Subflow Fwd Packets Flow Packets/s Total Fwd Packets Destination Port
count 2.938600e+04 2.938600e+04 29386.000000 29386.000000 29386.000000 2.938600e+04 29386.000000 29386.000000 29386.000000 2.938600e+04 29386.000000 29386.000000 29386.000000 2.938600e+04 29386.000000 29386.000000
mean 1.766497e+07 1.542059e+06 0.039372 331.761468 960.378752 1.440137e+06 0.665521 0.039372 46.788888 4.989700e+06 174.196328 182.228204 6.487375 4.883930e+04 6.487375 7834.468454
std 3.622483e+07 4.249589e+06 0.194483 621.806613 1999.336617 2.553663e+07 0.527011 0.194483 118.804856 1.220843e+07 297.400545 517.120979 113.325266 1.949747e+05 113.325266 17570.251858
min -1.000000e+00 -1.000000e+00 0.000000 0.000000 0.000000 -1.200000e+07 0.000000 0.000000 0.000000 0.000000e+00 0.000000 0.000000 1.000000 -2.000000e+06 1.000000 0.000000
25% 8.400000e+01 6.133333e+01 0.000000 6.000000 6.000000 1.212884e+02 0.000000 0.000000 2.000000 0.000000e+00 3.333333 2.000000 1.000000 1.278269e+00 1.000000 53.000000
50% 3.111650e+04 1.113090e+04 0.000000 63.000000 69.000000 5.129580e+03 1.000000 0.000000 34.000000 0.000000e+00 52.333332 37.000000 2.000000 1.146405e+02 2.000000 80.000000
75% 5.489763e+06 9.056584e+05 0.000000 202.482143 426.750000 1.481481e+05 1.000000 0.000000 51.000000 1.959326e+06 128.284720 265.000000 5.000000 2.666667e+04 5.000000 1216.000000
max 1.199985e+08 1.140000e+08 1.000000 3865.000000 11595.000000 2.070000e+09 10.000000 1.000000 3431.806400 8.280000e+07 1903.272700 23360.000000 18605.000000 3.000000e+06 18605.000000 65533.000000
In [ ]:
#Check for class imbalance
class_frequencies = df['Label'].value_counts()
print(class_frequencies)

plt.figure(figsize=(8, 6))
plt.bar(class_frequencies.index, class_frequencies.values)
plt.title('Class Distribution')
plt.xlabel('Classes')
plt.ylabel('Frequency')
plt.show()

df.describe()
Label
Benign         19242
PortScan        4849
DoS Hulk        3868
Brute Force     1427
Name: count, dtype: int64
No description has been provided for this image
Out[ ]:
Flow Duration Flow IAT Mean Fwd PSH Flags Bwd Packet Length Mean Bwd Packet Length Max Flow Bytes/s Down/Up Ratio SYN Flag Count Fwd Packet Length Mean Fwd IAT Std Packet Length Mean Fwd Packet Length Max Subflow Fwd Packets Flow Packets/s Total Fwd Packets Destination Port
count 2.938600e+04 2.938600e+04 29386.000000 29386.000000 29386.000000 2.938600e+04 29386.000000 29386.000000 29386.000000 2.938600e+04 29386.000000 29386.000000 29386.000000 2.938600e+04 29386.000000 29386.000000
mean 1.766497e+07 1.542059e+06 0.039372 331.761468 960.378752 1.440137e+06 0.665521 0.039372 46.788888 4.989700e+06 174.196328 182.228204 6.487375 4.883930e+04 6.487375 7834.468454
std 3.622483e+07 4.249589e+06 0.194483 621.806613 1999.336617 2.553663e+07 0.527011 0.194483 118.804856 1.220843e+07 297.400545 517.120979 113.325266 1.949747e+05 113.325266 17570.251858
min -1.000000e+00 -1.000000e+00 0.000000 0.000000 0.000000 -1.200000e+07 0.000000 0.000000 0.000000 0.000000e+00 0.000000 0.000000 1.000000 -2.000000e+06 1.000000 0.000000
25% 8.400000e+01 6.133333e+01 0.000000 6.000000 6.000000 1.212884e+02 0.000000 0.000000 2.000000 0.000000e+00 3.333333 2.000000 1.000000 1.278269e+00 1.000000 53.000000
50% 3.111650e+04 1.113090e+04 0.000000 63.000000 69.000000 5.129580e+03 1.000000 0.000000 34.000000 0.000000e+00 52.333332 37.000000 2.000000 1.146405e+02 2.000000 80.000000
75% 5.489763e+06 9.056584e+05 0.000000 202.482143 426.750000 1.481481e+05 1.000000 0.000000 51.000000 1.959326e+06 128.284720 265.000000 5.000000 2.666667e+04 5.000000 1216.000000
max 1.199985e+08 1.140000e+08 1.000000 3865.000000 11595.000000 2.070000e+09 10.000000 1.000000 3431.806400 8.280000e+07 1903.272700 23360.000000 18605.000000 3.000000e+06 18605.000000 65533.000000
In [ ]:
unique_labels = np.unique(df['Label'])
print(unique_labels)
# Encode labels
label_encoder = LabelEncoder()
df['Label'] = label_encoder.fit_transform(df['Label'])

# Get the mapping of original labels to encoded labels
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

# Print the mapping
for original_label, encoded_label in label_mapping.items():
    print(f"Original Label: {original_label}, Encoded Label: {encoded_label}")

df.head()
['Benign' 'Brute Force' 'DoS Hulk' 'PortScan']
Original Label: Benign, Encoded Label: 0
Original Label: Brute Force, Encoded Label: 1
Original Label: DoS Hulk, Encoded Label: 2
Original Label: PortScan, Encoded Label: 3
Out[ ]:
Flow Duration Flow IAT Mean Fwd PSH Flags Bwd Packet Length Mean Bwd Packet Length Max Flow Bytes/s Down/Up Ratio SYN Flag Count Fwd Packet Length Mean Fwd IAT Std Packet Length Mean Fwd Packet Length Max Subflow Fwd Packets Flow Packets/s Total Fwd Packets Destination Port Label
0 303376 30337.600000 0 749.4 1448 12743.263 0 0 19.833334 98776.15 322.16666 119 6 36.258636 6 443 0
1 117 117.000000 0 0.0 0 0.000 1 0 0.000000 0.00 0.00000 0 1 17094.018000 1 52631 0
2 142 142.000000 0 0.0 0 84507.040 0 0 6.000000 0.00 6.00000 6 2 14084.507000 2 80 0
3 191 63.666668 0 52.0 52 921465.940 1 0 36.000000 0.00 42.40000 36 2 20942.408000 2 53 0
4 4 4.000000 0 0.0 0 3000000.000 0 0 6.000000 0.00 6.00000 6 2 500000.000000 2 49467 0
In [ ]:
# Split the dataset
X = df.drop('Label', axis=1)
y = df['Label'].values

#default stratify
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

#               ******* REPLACEMENT PORT PART**********

X_test_future = X_test.copy()
y_test_future = y_test.copy()

# Check if change went well
print("Before modification:")
print(X_test_future[X_test_future["Destination Port"] == 80].shape)
print(X_test_future[X_test_future["Destination Port"] == 8080].shape)

# Modify the test set: Change Port 80 -> 8080 for Brute Force attacks
# NB: Label "Brute force" is associated to 1! substituted w/ dynamic mapping
brute_force_indices = (y_test_future == label_mapping['Brute Force']) & (X_test_future["Destination Port"] == 80)
X_test_future.loc[brute_force_indices, "Destination Port"] = 8080

print("After modification:")
print(X_test_future[X_test_future["Destination Port"] == 80].shape)
print(X_test_future[X_test_future["Destination Port"] == 8080].shape)
Before modification:
(1478, 16)
(2, 16)
After modification:
(1203, 16)
(277, 16)
In [ ]:
df
Out[ ]:
Flow Duration Flow IAT Mean Fwd PSH Flags Bwd Packet Length Mean Bwd Packet Length Max Flow Bytes/s Down/Up Ratio SYN Flag Count Fwd Packet Length Mean Fwd IAT Std Packet Length Mean Fwd Packet Length Max Subflow Fwd Packets Flow Packets/s Total Fwd Packets Destination Port Label
0 303376 3.033760e+04 0 749.4 1448 12743.263 0 0 19.833334 98776.15 322.16666 119 6 36.258636 6 443 0
1 117 1.170000e+02 0 0.0 0 0.000 1 0 0.000000 0.00 0.00000 0 1 17094.018000 1 52631 0
2 142 1.420000e+02 0 0.0 0 84507.040 0 0 6.000000 0.00 6.00000 6 2 14084.507000 2 80 0
3 191 6.366667e+01 0 52.0 52 921465.940 1 0 36.000000 0.00 42.40000 36 2 20942.408000 2 53 0
4 4 4.000000e+00 0 0.0 0 3000000.000 0 0 6.000000 0.00 6.00000 6 2 500000.000000 2 49467 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
31502 5710955 1.903652e+06 0 0.0 0 0.000 0 0 0.000000 4037277.80 0.00000 0 3 0.700408 3 80 1
31503 5862561 1.954187e+06 0 0.0 0 0.000 0 0 0.000000 4144374.80 0.00000 0 3 0.682296 3 80 1
31504 5854872 1.951624e+06 0 0.0 0 0.000 0 0 0.000000 4139029.80 0.00000 0 3 0.683192 3 80 1
31505 5555547 1.851849e+06 0 0.0 0 0.000 0 0 0.000000 3927356.50 0.00000 0 3 0.720001 3 80 1
31506 5018204 1.672735e+06 0 0.0 0 0.000 0 0 0.000000 3547404.80 0.00000 0 3 0.797098 3 80 1

29386 rows × 17 columns

In [ ]:
X_train.describe()
Out[ ]:
Flow Duration Flow IAT Mean Fwd PSH Flags Bwd Packet Length Mean Bwd Packet Length Max Flow Bytes/s Down/Up Ratio SYN Flag Count Fwd Packet Length Mean Fwd IAT Std Packet Length Mean Fwd Packet Length Max Subflow Fwd Packets Flow Packets/s Total Fwd Packets Destination Port
count 1.763100e+04 1.763100e+04 17631.000000 17631.000000 17631.000000 1.763100e+04 17631.000000 17631.000000 17631.000000 1.763100e+04 17631.000000 17631.000000 17631.000000 1.763100e+04 17631.000000 17631.000000
mean 1.761041e+07 1.539809e+06 0.039079 327.796696 948.627758 1.155893e+06 0.665759 0.039079 47.418032 4.935995e+06 172.768614 184.258692 6.791674 4.845513e+04 6.791674 7793.989677
std 3.616016e+07 4.220290e+06 0.193788 618.289272 1988.594839 1.870955e+07 0.525327 0.193788 124.756126 1.214170e+07 296.561790 544.983752 141.760906 1.914530e+05 141.760906 17494.211035
min -1.000000e+00 -1.000000e+00 0.000000 0.000000 0.000000 -1.200000e+07 0.000000 0.000000 0.000000 0.000000e+00 0.000000 0.000000 1.000000 -2.000000e+06 1.000000 0.000000
25% 8.150000e+01 6.100000e+01 0.000000 6.000000 6.000000 1.211933e+02 0.000000 0.000000 2.000000 0.000000e+00 3.333333 2.000000 1.000000 1.276319e+00 1.000000 53.000000
50% 3.116400e+04 1.134850e+04 0.000000 61.000000 68.000000 4.959466e+03 1.000000 0.000000 34.000000 0.000000e+00 51.800000 37.000000 2.000000 1.115480e+02 2.000000 80.000000
75% 5.508626e+06 9.155924e+05 0.000000 198.000000 384.000000 1.454545e+05 1.000000 0.000000 51.000000 1.961929e+06 126.400000 242.000000 5.000000 2.702703e+04 5.000000 1218.000000
max 1.199985e+08 6.410000e+07 1.000000 3865.000000 11595.000000 1.040000e+09 5.000000 1.000000 3431.806400 8.280000e+07 1903.272700 23360.000000 18605.000000 3.000000e+06 18605.000000 65533.000000
In [ ]:
# Standardize the features
scaler = StandardScaler() #The features have gaussian distribution
X_train = scaler.fit_transform(X_train) #scaler fitted on training set only
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
X_test_future = scaler.transform(X_test_future)  # Standardize the copied test set as well

Task 2: Shallow Neural Network¶

In [ ]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)
X_test_future_tensor = torch.tensor(X_test_future, dtype=torch.float32)
y_test_future_tensor = torch.tensor(y_test_future, dtype=torch.long)
In [ ]:
# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_future_dataset = TensorDataset(X_test_future_tensor, y_test_future_tensor)


train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
test_future_loader = DataLoader(test_future_dataset, batch_size=64, shuffle=False)
In [ ]:
#Define the device to use for training and validation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
cuda
In [ ]:
import torch.nn as nn

class SimpleLinearModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleLinearModel, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)  # Primo layer
        self.linear2 = nn.Linear(hidden_size, output_size) # Secondo layer

    def forward(self, x):
        x = self.linear1(x)  # Passa l'input al primo layer
        x = self.linear2(x)  # Passa l'output del primo layer al secondo
        return x
In [ ]:
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay, precision_score, f1_score, recall_score
from sklearn.preprocessing import StandardScaler
from scipy.io.arff import loadarff
import matplotlib.pyplot as plt
import seaborn as sns

# Function to visualize model performance by creating a confusion matrix
def plot_confusion_matrix(y_true, y_pred, class_names):

    cm = confusion_matrix(y_true, y_pred)  # Compute confusion matrix

    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=class_names, yticklabels=class_names)

    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.show()


def training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion):
    """
    Train the model for a specified number of epochs and track the training and validation losses.
    """
    start_time = time.time()
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        train_loss = 0

        for batch_X, batch_y in train_loader:
            batch_X, batch_y = batch_X.to(device), batch_y.to(device)
            optimizer.zero_grad()
            outputs = model(batch_X)  # Forward pass
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * batch_X.size(0)

        train_loss /= len(train_dataset)
        train_losses.append(train_loss)

        # Validation
        model.eval()  # Set model to evaluation mode
        val_loss = 0
        with torch.no_grad():  # Disable gradient computation for validation
            for batch_X, batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                val_loss += loss.item() * batch_X.size(0)

        val_loss /= len(val_dataset)
        val_losses.append(val_loss)

        if (epoch + 1) % 20 == 0:  # Print every 20 epochs
            print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_losses[-1]:.4f}, Val Loss: {val_losses[-1]:.4f}')

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f'Training took {elapsed_time:.4f} seconds to execute.')

    # Plot losses
    plt.figure(figsize=(10, 5))
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.show()

    return

def testing_model(model, dataloader, device):

    # Record the start time
    start_time = time.time()

    model.eval()  # Set the model to evaluation mode
    all_labels = []
    all_predictions = []

    with torch.no_grad():  # Disable gradient computation for efficiency
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)  # Move batch to GPU
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)  # Get the class with highest score
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    class_names = ["Benign", "Brute Force", "DoS Hulk", "PortScan"]
    # Plot the confusion matrix
    plot_confusion_matrix(all_labels, all_predictions, class_names)

    accuracy = accuracy_score(all_labels, all_predictions) * 100
    precision = precision_score(all_labels, all_predictions,average='macro', zero_division=0) * 100
    f1 = f1_score(all_labels, all_predictions,average='macro', zero_division=0) * 100
    recall = recall_score(all_labels, all_predictions,average='macro', zero_division=0) * 100
    print(f'F1-score: {f1:.4f}')
    print(f'Recall: {recall:.4f}')
    # Record the end time
    end_time = time.time()
    # Calculate the elapsed time
    elapsed_time = end_time - start_time
    #print(f'The function took {elapsed_time:.4f} seconds to execute.')

    return accuracy,precision,f1

From now on we will try three different models, each with different hidden_size (neurons per layer)

In [ ]:
criterion = nn.CrossEntropyLoss()
# Define training parameters
num_epochs = 100
In [ ]:
#32 Neurons
import torch.optim as optim
linearModel = SimpleLinearModel(input_size=16, hidden_size=32, output_size=4).to(device)
optimizer = optim.AdamW(linearModel.parameters(), lr=0.0005)
training_loop(linearModel, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
Epoch 20/100, Train Loss: 0.3511, Val Loss: 0.3240
Epoch 40/100, Train Loss: 0.3331, Val Loss: 0.3075
Epoch 60/100, Train Loss: 0.3250, Val Loss: 0.3006
Epoch 80/100, Train Loss: 0.3197, Val Loss: 0.2983
Epoch 100/100, Train Loss: 0.3153, Val Loss: 0.2930
Training took 52.9871 seconds to execute.
No description has been provided for this image
In [ ]:
model=linearModel
print("Train --------------------------------------")
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
print("Validation --------------------------------------")
val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
print("Test --------------------------------------")
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
Train --------------------------------------
No description has been provided for this image
F1-score: 67.6426
Recall: 67.5987
Train Accuracy: 88.6960
Train Precision: 68.0087
Validation --------------------------------------
No description has been provided for this image
F1-score: 67.9871
Recall: 67.9766
Validation Accuracy: 89.1952
Validation Precision: 68.2999
Test --------------------------------------
No description has been provided for this image
F1-score: 68.1387
Recall: 68.1335
Test Accuracy: 89.2140
Test Precision: 68.3944
In [ ]:
#64 Neurons

criterion = nn.CrossEntropyLoss()
# Define training parameters
num_epochs = 100

linearModel = SimpleLinearModel(input_size=16, hidden_size=64, output_size=4).to(device)
optimizer = optim.AdamW(linearModel.parameters(), lr=0.0005)
training_loop(linearModel, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)

model=linearModel
print("Train --------------------------------------")
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
print("Validation --------------------------------------")
val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
print("Test --------------------------------------")
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
Epoch 20/100, Train Loss: 0.3448, Val Loss: 0.3176
Epoch 40/100, Train Loss: 0.3295, Val Loss: 0.3035
Epoch 60/100, Train Loss: 0.3219, Val Loss: 0.2975
Epoch 80/100, Train Loss: 0.3186, Val Loss: 0.2946
Epoch 100/100, Train Loss: 0.3128, Val Loss: 0.2902
Training took 55.4442 seconds to execute.
No description has been provided for this image
Train --------------------------------------
No description has been provided for this image
F1-score: 67.5021
Recall: 67.8811
Train Accuracy: 88.6790
Train Precision: 67.4977
Validation --------------------------------------
No description has been provided for this image
F1-score: 67.8369
Recall: 68.3050
Validation Accuracy: 89.1101
Validation Precision: 67.7317
Test --------------------------------------
No description has been provided for this image
F1-score: 68.1955
Recall: 68.5614
Test Accuracy: 89.3841
Test Precision: 68.1114
In [ ]:
#128 Neurons

criterion = nn.CrossEntropyLoss()
# Define training parameters
num_epochs = 100

linearModel = SimpleLinearModel(input_size=16, hidden_size=128, output_size=4).to(device)
optimizer = optim.AdamW(linearModel.parameters(), lr=0.0005)
training_loop(linearModel, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)

model=linearModel
print("Train --------------------------------------")
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
print("Validation --------------------------------------")
val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
print("Test --------------------------------------")
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
Epoch 20/100, Train Loss: 0.3391, Val Loss: 0.3094
Epoch 40/100, Train Loss: 0.3251, Val Loss: 0.2984
Epoch 60/100, Train Loss: 0.3169, Val Loss: 0.2944
Epoch 80/100, Train Loss: 0.3133, Val Loss: 0.2957
Epoch 100/100, Train Loss: 0.3138, Val Loss: 0.2898
Training took 54.7817 seconds to execute.
No description has been provided for this image
Train --------------------------------------
No description has been provided for this image
F1-score: 68.1157
Recall: 67.7666
Train Accuracy: 89.2632
Train Precision: 68.7910
Validation --------------------------------------
No description has been provided for this image
F1-score: 68.5877
Recall: 68.1398
Validation Accuracy: 89.8928
Validation Precision: 69.3453
Test --------------------------------------
No description has been provided for this image
F1-score: 68.4369
Recall: 68.1176
Test Accuracy: 89.6053
Test Precision: 69.0063

Change the model with the best performance (optimal number of neurons) by changing the activation function in ReLU and evaluate the effects.

In [ ]:
class SimpleReluModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleReluModel, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)      # ReLU activation function
        x = self.linear2(x)
        return x
In [ ]:
criterion = nn.CrossEntropyLoss()
# Define training parameters
num_epochs = 100

reluModel = SimpleReluModel(input_size=16, hidden_size=128, output_size=4).to(device)
optimizer = optim.AdamW(reluModel.parameters(), lr=0.0005)
training_loop(reluModel, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)

model=reluModel
print("Train --------------------------------------")
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
print("Validation --------------------------------------")
val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
print("Test --------------------------------------")
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
Epoch 20/100, Train Loss: 0.1607, Val Loss: 0.1433
Epoch 40/100, Train Loss: 0.1391, Val Loss: 0.1277
Epoch 60/100, Train Loss: 0.1288, Val Loss: 0.1190
Epoch 80/100, Train Loss: 0.1218, Val Loss: 0.1180
Epoch 100/100, Train Loss: 0.1160, Val Loss: 0.1123
Training took 57.2909 seconds to execute.
No description has been provided for this image
Train --------------------------------------
No description has been provided for this image
F1-score: 92.7681
Recall: 93.3803
Train Accuracy: 95.3037
Train Precision: 92.5439
Validation --------------------------------------
No description has been provided for this image
F1-score: 93.3765
Recall: 93.7646
Validation Accuracy: 95.6781
Validation Precision: 93.3060
Test --------------------------------------
No description has been provided for this image
F1-score: 93.8198
Recall: 94.7813
Test Accuracy: 96.0020
Test Precision: 93.2792
In [ ]:
X_test_future
Out[ ]:
array([[-0.48612106, -0.36099551,  4.95875587, ..., -0.25261941,
        -0.03380205,  2.37731154],
       [-0.48702368, -0.36485602, -0.20166349, ..., -0.05964087,
        -0.04085638,  2.07537453],
       [-0.48702083, -0.36485641, -0.20166349, ..., -0.12001944,
        -0.03380205, -0.44250119],
       ...,
       [-0.48702312, -0.36485128, -0.20166349, ..., -0.11192678,
        -0.04085638, -0.1024362 ],
       [-0.48517319, -0.35957945, -0.20166349, ..., -0.25278674,
        -0.03380205, -0.44250119],
       [ 1.52089177,  5.36985501, -0.20166349, ..., -0.25309845,
        -0.03380205, -0.44250119]])

Task 3: The impact of specific features¶

In [ ]:
#test_future_loader contains test dataset w/ 80->8080 swap for bruteforce attacks
model=reluModel
print("Train --------------------------------------")
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
print("Validation --------------------------------------")
val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
print("Test --------------------------------------")
test_accuracy, test_prec, _ = testing_model(model,test_future_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
Train --------------------------------------
No description has been provided for this image
F1-score: 92.7681
Recall: 93.3803
Train Accuracy: 95.3037
Train Precision: 92.5439
Validation --------------------------------------
No description has been provided for this image
F1-score: 93.3765
Recall: 93.7646
Validation Accuracy: 95.6781
Validation Precision: 93.3060
Test --------------------------------------
No description has been provided for this image
F1-score: 72.8480
Recall: 71.7813
Test Accuracy: 91.6979
Test Precision: 75.8571

Remove the feature port from the original dataset

In [ ]:
# Drop the destination port
new_df = new_df.drop('Destination Port', axis=1)

print("Number of PortScan before pre-processing:", (new_df["Label"] == "PortScan").sum())


# Remove duplicates
new_df = new_df.drop_duplicates()
# Remove rows with missing values
new_df = new_df.dropna()
# remove rows with inf
new_df = new_df[~new_df.isin([np.inf, -np.inf]).any(axis=1)]

print("Number of PortScan after pre-processing: ",(new_df["Label"] == "PortScan").sum())


#Check for class imbalance. Just plot a graph that counts the number of samples for each class. We can see that is imbalance since there are far more benign samples
class_frequencies = new_df['Label'].value_counts()
print(class_frequencies)

plt.figure(figsize=(8, 6))
plt.bar(class_frequencies.index, class_frequencies.values)
plt.title('Class Distribution')
plt.xlabel('Classes')
plt.ylabel('Frequency')
plt.show()

# Encode labels
label_encoder = LabelEncoder()
new_df['Label'] = label_encoder.fit_transform(new_df['Label'])
Number of PortScan before pre-processing: 5000
Number of PortScan after pre-processing:  285
Label
Benign         16889
DoS Hulk        3868
Brute Force     1427
PortScan         285
Name: count, dtype: int64
No description has been provided for this image
In [ ]:
# Split the dataset

X = new_df.drop('Label', axis=1)
y = new_df['Label'].values

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=29)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=29)
In [ ]:
unique_values, value_counts = np.unique(y, return_counts=True)

# Stampa i conteggi dei valori
for value, count in zip(unique_values, value_counts):
    print(f"Valore: {value}, Conteggio: {count}")
Valore: 0, Conteggio: 16889
Valore: 1, Conteggio: 1427
Valore: 2, Conteggio: 3868
Valore: 3, Conteggio: 285
In [ ]:
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
In [ ]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)
In [ ]:
# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
In [ ]:
criterion = nn.CrossEntropyLoss()
# Define training parameters
num_epochs = 100
reluModel = SimpleReluModel(input_size=15, hidden_size=128, output_size=4).to(device)
optimizer = optim.AdamW(reluModel.parameters(), lr=0.0005)
training_loop(reluModel, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
Epoch 20/100, Train Loss: 0.1486, Val Loss: 0.1626
Epoch 40/100, Train Loss: 0.1246, Val Loss: 0.1436
Epoch 60/100, Train Loss: 0.1144, Val Loss: 0.1365
Epoch 80/100, Train Loss: 0.1091, Val Loss: 0.1324
Epoch 100/100, Train Loss: 0.1051, Val Loss: 0.1300
Training took 43.7989 seconds to execute.
No description has been provided for this image
In [ ]:
model=reluModel
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
No description has been provided for this image
F1-score: 84.3470
Recall: 86.8686
Train Accuracy: 95.7792
Train Precision: 82.4397
No description has been provided for this image
F1-score: 82.7014
Recall: 85.5704
Validation Accuracy: 95.2826
Validation Precision: 80.5669
No description has been provided for this image
F1-score: 82.4631
Recall: 84.1222
Test Accuracy: 95.2381
Test Precision: 81.1919
No description has been provided for this image
F1-score: 84.3470
Recall: 86.8686
Train Accuracy: 95.7792
Train Precision: 82.4397
No description has been provided for this image
F1-score: 82.7014
Recall: 85.5704
Validation Accuracy: 95.2826
Validation Precision: 80.5669
No description has been provided for this image
F1-score: 82.4631
Recall: 84.1222
Test Accuracy: 95.2381
Test Precision: 81.1919

Use weighted loss

In [ ]:
from sklearn.utils.class_weight import compute_class_weight

# Calculate class weights using 'balanced' mode
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)

# Convert to a PyTorch tensor
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device)
In [ ]:
criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
# Define training parameters
num_epochs = 100
reluModel = SimpleReluModel(input_size=15, hidden_size=128, output_size=4).to(device)
optimizer = optim.AdamW(reluModel.parameters(), lr=0.0005)
training_loop(reluModel, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
Epoch 20/100, Train Loss: 0.2409, Val Loss: 0.3291
Epoch 40/100, Train Loss: 0.1999, Val Loss: 0.3140
Epoch 60/100, Train Loss: 0.1808, Val Loss: 0.3268
Epoch 80/100, Train Loss: 0.1733, Val Loss: 0.3363
Epoch 100/100, Train Loss: 0.1654, Val Loss: 0.3498
Training took 44.5534 seconds to execute.
No description has been provided for this image
In [ ]:
model=reluModel
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
No description has been provided for this image
F1-score: 78.7906
Recall: 94.9750
Train Accuracy: 92.9530
Train Precision: 74.1787
No description has been provided for this image
F1-score: 76.1936
Recall: 93.0487
Validation Accuracy: 91.8558
Validation Precision: 71.8612
No description has been provided for this image
F1-score: 78.5373
Recall: 92.8087
Test Accuracy: 92.4121
Test Precision: 73.9810

Task 4: Deep Neural Network¶

In [ ]:
class DeepFFNN(nn.Module):
    def __init__(self, input_size, layer_sizes, output_size):
        super(DeepFFNN, self).__init__()
        layers = []

        # first hidden layer
        layers.append(nn.Linear(input_size, layer_sizes[0]))
        layers.append(nn.ReLU())

        # Hidden layers added
        for i in range(1, len(layer_sizes)):
            layers.append(nn.Linear(layer_sizes[i - 1], layer_sizes[i]))
            layers.append(nn.ReLU())


        layers.append(nn.Linear(layer_sizes[-1], output_size)) #for the last layer we don't use a relu as activation function, since every output <0 will be seen as a 0 with a relu, so it's better to use the automatic softmax applied by pytorch

        self.model = nn.Sequential(*layers) #Create an ordered sequence of layers that are applied one after another to the input data.
                                            # model becomes the whole neural network

    def forward(self, x):
        return self.model(x)
In [ ]:
#Define the device to use for training and validation
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
cuda
In [ ]:
criterion = nn.CrossEntropyLoss()
In [ ]:
import random

layer_sizes_possible = [2, 4, 8, 16, 32]
no_candidates = 30
candidates = []

for _ in range(no_candidates):
    # pick number of layers between 2 and 5
    layer_number = random.randint(2, 5)
    # randomly choose neurons for each layer
    layer_sizes = [random.choice(layer_sizes_possible) for _ in range(layer_number)]
    candidates.append(layer_sizes)
    print(layer_sizes)

# Example output:
# [16, 32]
# [8, 16, 32, 16]
# [32, 32, 8, 16, 4]
[4, 2, 16]
[4, 32, 16, 16, 2]
[2, 8, 4, 16]
[8, 4, 2]
[32, 4, 2, 8, 8]
[32, 16, 32]
[4, 4, 16]
[32, 4]
[4, 8, 8, 4]
[8, 32, 32, 8, 2]
[2, 4, 8, 8, 32]
[8, 16, 2, 32, 8]
[8, 16, 2, 2]
[16, 8]
[2, 2, 8, 8]
[2, 2, 32]
[8, 4, 4]
[8, 16, 2, 2, 2]
[2, 16, 32, 4, 32]
[2, 16, 32, 32]
[32, 32]
[16, 16]
[32, 16, 4, 16, 2]
[8, 2, 8, 8]
[4, 2]
[4, 4, 8, 4]
[32, 8, 8, 16]
[4, 16, 4, 8, 8]
[4, 4, 32, 2, 2]
[4, 4, 8, 2]
In [ ]:
num_epochs = 15 #low just to test
results = []

for i in range(no_candidates):
  performance = []
  layer_sizes = candidates[i]
  input_size = 15
  output_size = 4

  model = DeepFFNN(input_size, layer_sizes, output_size).to(device)
  optimizer = optim.AdamW(model.parameters(), lr=0.0005)
  training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
  print("Neurons per layer:", layer_sizes)
  train_accuracy,train_prec, train_f1 = testing_model(model,train_loader,device)
  print(f'Train Accuracy: {train_accuracy:.4f}')
  print(f'Train Precision: {train_prec:.4f}')
  val_accuracy, val_prec, val_f1 = testing_model(model,val_loader,device)
  print(f'Validation Accuracy: {val_accuracy:.4f}')
  print(f'Validation Precision: {val_prec:.4f}')
  performance.append(layer_sizes)
  performance.append(train_accuracy)
  performance.append(train_prec)
  performance.append(train_f1)
  performance.append(val_accuracy)
  performance.append(val_prec)
  performance.append(val_f1)
  results.append(performance)
Training took 7.1567 seconds to execute.
No description has been provided for this image
Neurons per layer: [4, 2, 16]
No description has been provided for this image
F1-score: 68.3351
Recall: 85.7106
Train Accuracy: 84.2890
Train Precision: 66.6505
No description has been provided for this image
F1-score: 66.7716
Recall: 85.0999
Validation Accuracy: 83.4001
Validation Precision: 65.1009
Training took 9.0947 seconds to execute.
No description has been provided for this image
Neurons per layer: [4, 32, 16, 16, 2]
No description has been provided for this image
F1-score: 59.1754
Recall: 67.7871
Train Accuracy: 87.4416
Train Precision: 58.0324
No description has been provided for this image
F1-score: 58.4433
Recall: 67.7946
Validation Accuracy: 86.5821
Validation Precision: 57.4330
Training took 8.9984 seconds to execute.
No description has been provided for this image
Neurons per layer: [2, 8, 4, 16]
No description has been provided for this image
F1-score: 59.2568
Recall: 83.5613
Train Accuracy: 76.0107
Train Precision: 56.8752
No description has been provided for this image
F1-score: 57.8488
Recall: 82.2513
Validation Accuracy: 75.1001
Validation Precision: 56.1010
Training took 7.8435 seconds to execute.
No description has been provided for this image
Neurons per layer: [8, 4, 2]
No description has been provided for this image
F1-score: 46.3360
Recall: 46.6078
Train Accuracy: 89.5705
Train Precision: 46.3473
No description has been provided for this image
F1-score: 46.4956
Recall: 46.6953
Validation Accuracy: 90.1647
Validation Precision: 46.5752
Training took 8.7845 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 4, 2, 8, 8]
No description has been provided for this image
F1-score: 70.5272
Recall: 86.7461
Train Accuracy: 85.5352
Train Precision: 69.9555
No description has been provided for this image
F1-score: 69.3425
Recall: 86.6849
Validation Accuracy: 85.0245
Validation Precision: 68.6328
Training took 8.0487 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 16, 32]
No description has been provided for this image
F1-score: 79.2089
Recall: 92.7660
Train Accuracy: 91.5882
Train Precision: 72.6501
No description has been provided for this image
F1-score: 76.6676
Recall: 91.8378
Validation Accuracy: 90.3649
Validation Precision: 69.9297
Training took 7.6940 seconds to execute.
No description has been provided for this image
Neurons per layer: [4, 4, 16]
No description has been provided for this image
F1-score: 72.0091
Recall: 88.4398
Train Accuracy: 88.4949
Train Precision: 67.9285
No description has been provided for this image
F1-score: 70.2334
Recall: 87.3099
Validation Accuracy: 88.0062
Validation Precision: 66.5039
Training took 6.7497 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 4]
No description has been provided for this image
F1-score: 73.5368
Recall: 89.4798
Train Accuracy: 89.6521
Train Precision: 70.1438
No description has been provided for this image
F1-score: 72.1334
Recall: 89.5307
Validation Accuracy: 89.0966
Validation Precision: 68.5401
Training took 8.6078 seconds to execute.
No description has been provided for this image
Neurons per layer: [4, 8, 8, 4]
No description has been provided for this image
F1-score: 59.5529
Recall: 81.5069
Train Accuracy: 76.6931
Train Precision: 59.1195
No description has been provided for this image
F1-score: 58.8881
Recall: 81.0414
Validation Accuracy: 76.3907
Validation Precision: 58.7379
Training took 9.1910 seconds to execute.
No description has been provided for this image
Neurons per layer: [8, 32, 32, 8, 2]
No description has been provided for this image
F1-score: 56.1364
Recall: 81.6779
Train Accuracy: 60.4109
Train Precision: 55.5141
No description has been provided for this image
F1-score: 53.5228
Recall: 79.3258
Validation Accuracy: 59.2345
Validation Precision: 52.9661
Training took 8.3781 seconds to execute.
No description has been provided for this image
Neurons per layer: [2, 4, 8, 8, 32]
No description has been provided for this image
F1-score: 65.2121
Recall: 69.0702
Train Accuracy: 91.8626
Train Precision: 63.3197
No description has been provided for this image
F1-score: 64.1471
Recall: 68.3112
Validation Accuracy: 91.4553
Validation Precision: 62.2242
Training took 9.1031 seconds to execute.
No description has been provided for this image
Neurons per layer: [8, 16, 2, 32, 8]
No description has been provided for this image
F1-score: 66.4796
Recall: 86.5116
Train Accuracy: 79.3858
Train Precision: 66.0250
No description has been provided for this image
F1-score: 65.4102
Recall: 84.6428
Validation Accuracy: 78.8830
Validation Precision: 65.1817
Training took 8.6086 seconds to execute.
No description has been provided for this image
Neurons per layer: [8, 16, 2, 2]
No description has been provided for this image
F1-score: 51.1977
Recall: 62.7378
Train Accuracy: 76.2184
Train Precision: 49.9296
No description has been provided for this image
F1-score: 50.9182
Recall: 62.6080
Validation Accuracy: 76.0125
Validation Precision: 49.9394
Training took 6.9522 seconds to execute.
No description has been provided for this image
Neurons per layer: [16, 8]
No description has been provided for this image
F1-score: 76.7372
Recall: 87.4229
Train Accuracy: 91.6920
Train Precision: 72.7366
No description has been provided for this image
F1-score: 75.1355
Recall: 86.8281
Validation Accuracy: 91.2773
Validation Precision: 70.9620
Training took 8.9312 seconds to execute.
No description has been provided for this image
Neurons per layer: [2, 2, 8, 8]
No description has been provided for this image
F1-score: 50.6128
Recall: 61.3531
Train Accuracy: 72.3685
Train Precision: 53.0237
No description has been provided for this image
F1-score: 50.1866
Recall: 61.0356
Validation Accuracy: 71.7846
Validation Precision: 52.8635
Training took 8.1122 seconds to execute.
No description has been provided for this image
Neurons per layer: [2, 2, 32]
No description has been provided for this image
F1-score: 62.9501
Recall: 67.2120
Train Accuracy: 90.3123
Train Precision: 61.0855
No description has been provided for this image
F1-score: 62.2757
Recall: 66.9064
Validation Accuracy: 90.1202
Validation Precision: 60.4277
Training took 7.8664 seconds to execute.
No description has been provided for this image
Neurons per layer: [8, 4, 4]
No description has been provided for this image
F1-score: 49.0704
Recall: 74.8913
Train Accuracy: 48.3495
Train Precision: 57.0770
No description has been provided for this image
F1-score: 48.2464
Recall: 75.2545
Validation Accuracy: 47.4633
Validation Precision: 56.6805
Training took 8.4832 seconds to execute.
No description has been provided for this image
Neurons per layer: [8, 16, 2, 2, 2]
No description has been provided for this image
F1-score: 65.9643
Recall: 67.0550
Train Accuracy: 93.0050
Train Precision: 65.3047
No description has been provided for this image
F1-score: 64.8277
Recall: 66.2971
Validation Accuracy: 92.7014
Validation Precision: 63.9206
Training took 9.1975 seconds to execute.
No description has been provided for this image
Neurons per layer: [2, 16, 32, 4, 32]
No description has been provided for this image
F1-score: 67.1579
Recall: 88.9723
Train Accuracy: 80.8323
Train Precision: 67.0328
No description has been provided for this image
F1-score: 65.4252
Recall: 87.3459
Validation Accuracy: 79.3057
Validation Precision: 65.5463
Training took 8.6381 seconds to execute.
No description has been provided for this image
Neurons per layer: [2, 16, 32, 32]
No description has been provided for this image
F1-score: 63.5586
Recall: 86.4143
Train Accuracy: 75.6917
Train Precision: 62.3348
No description has been provided for this image
F1-score: 63.2081
Recall: 85.8273
Validation Accuracy: 75.6564
Validation Precision: 62.3921
Training took 6.7213 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 32]
No description has been provided for this image
F1-score: 75.9491
Recall: 90.8476
Train Accuracy: 91.1208
Train Precision: 72.3236
No description has been provided for this image
F1-score: 74.4463
Recall: 90.3775
Validation Accuracy: 90.6542
Validation Precision: 70.8088
Training took 7.2196 seconds to execute.
No description has been provided for this image
Neurons per layer: [16, 16]
No description has been provided for this image
F1-score: 74.2703
Recall: 89.0593
Train Accuracy: 90.1343
Train Precision: 71.2061
No description has been provided for this image
F1-score: 72.6596
Recall: 89.1609
Validation Accuracy: 89.4971
Validation Precision: 69.4878
Training took 9.3211 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 16, 4, 16, 2]
No description has been provided for this image
F1-score: 43.1199
Recall: 48.6045
Train Accuracy: 82.6497
Train Precision: 41.1472
No description has been provided for this image
F1-score: 42.6916
Recall: 48.7538
Validation Accuracy: 82.1095
Validation Precision: 40.7426
Training took 8.2814 seconds to execute.
No description has been provided for this image
Neurons per layer: [8, 2, 8, 8]
No description has been provided for this image
F1-score: 62.1193
Recall: 84.1125
Train Accuracy: 77.3311
Train Precision: 62.0226
No description has been provided for this image
F1-score: 61.2490
Recall: 82.6301
Validation Accuracy: 76.9025
Validation Precision: 61.3539
Training took 6.8488 seconds to execute.
No description has been provided for this image
Neurons per layer: [4, 2]
No description has been provided for this image
F1-score: 39.9177
Recall: 52.4865
Train Accuracy: 46.7695
Train Precision: 50.5404
No description has been provided for this image
F1-score: 39.6581
Recall: 52.2724
Validation Accuracy: 45.9279
Validation Precision: 50.5744
Training took 8.7504 seconds to execute.
No description has been provided for this image
Neurons per layer: [4, 4, 8, 4]
No description has been provided for this image
F1-score: 64.6497
Recall: 67.4865
Train Accuracy: 90.9280
Train Precision: 62.4932
No description has been provided for this image
F1-score: 64.1858
Recall: 67.3890
Validation Accuracy: 91.2105
Validation Precision: 62.0087
Training took 14.4807 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 8, 8, 16]
No description has been provided for this image
F1-score: 70.1715
Recall: 89.3964
Train Accuracy: 85.0679
Train Precision: 64.8148
No description has been provided for this image
F1-score: 68.5475
Recall: 89.3312
Validation Accuracy: 84.2012
Validation Precision: 62.9774
Training took 14.3966 seconds to execute.
No description has been provided for this image
Neurons per layer: [4, 16, 4, 8, 8]
No description has been provided for this image
F1-score: 71.8650
Recall: 89.2345
Train Accuracy: 87.3971
Train Precision: 66.0923
No description has been provided for this image
F1-score: 69.9281
Recall: 87.8437
Validation Accuracy: 86.3818
Validation Precision: 64.1208
Training took 9.4049 seconds to execute.
No description has been provided for this image
Neurons per layer: [4, 4, 32, 2, 2]
No description has been provided for this image
F1-score: 21.4349
Recall: 25.0000
Train Accuracy: 75.0389
Train Precision: 18.7597
No description has been provided for this image
F1-score: 21.5893
Recall: 25.0000
Validation Accuracy: 75.9902
Validation Precision: 18.9976
Training took 8.7573 seconds to execute.
No description has been provided for this image
Neurons per layer: [4, 4, 8, 2]
No description has been provided for this image
F1-score: 40.1689
Recall: 53.6004
Train Accuracy: 46.5618
Train Precision: 51.4501
No description has been provided for this image
F1-score: 39.8067
Recall: 53.6892
Validation Accuracy: 45.6609
Validation Precision: 51.3594
In [ ]:
print("layer sizes, tr_acc, tr_pr, tr_f1, va_acc, va_pr, va_f1")
print("-------------")
for i in range(no_candidates):
  print(f"[{i}]{results[i]}")
layer sizes, tr_acc, tr_pr, tr_f1, va_acc, va_pr, va_f1
-------------
[0][[4, 2, 16], 84.28899933239374, 66.65046558704968, 68.33511285393146, 83.40008900756565, 65.1009010878891, 66.77159307038878]
[1][[4, 32, 16, 16, 2], 87.44158445219198, 58.0323986526375, 59.175416891936116, 86.58210947930573, 57.433013333065745, 58.443276492095606]
[2][[2, 8, 4, 16], 76.01068170017061, 56.87523970317332, 59.25677444951276, 75.10013351134846, 56.101037266145916, 57.848830760537126]
[3][[8, 4, 2], 89.57050663897337, 46.34730396127822, 46.33598094801782, 90.1646639964397, 46.575209643605874, 46.49563697220864]
[4][[32, 4, 2, 8, 8], 85.53519768563163, 69.9555334371407, 70.52719922101595, 85.02447708055185, 68.6328359638024, 69.34246952720822]
[5][[32, 16, 32], 91.58816111564424, 72.650104306391, 79.20891855841832, 90.36493101913663, 69.92965681946872, 76.66762786075975]
[6][[4, 4, 16], 88.49491877457162, 67.92848997909067, 72.00913454384602, 88.00623052959502, 66.50391802314093, 70.23344196869823]
[7][[32, 4], 89.65210295972109, 70.14384137525806, 73.53680834165834, 89.09657320872275, 68.54012372723578, 72.13335321711891]
[8][[4, 8, 8, 4], 76.69312365551517, 59.119507136175834, 59.5528506896984, 76.39074321317312, 58.73794763342279, 58.88809808953661]
[9][[8, 32, 32, 8, 2], 60.410948742674876, 55.51407786066004, 56.136380546117806, 59.23453493546952, 52.96613791812267, 53.522782869660226]
[10][[2, 4, 8, 8, 32], 91.86262146725021, 63.31974734575147, 65.21214979610744, 91.45527369826435, 62.22420054209174, 64.14711593689964]
[11][[8, 16, 2, 32, 8], 79.38580224018989, 66.02503093363772, 66.4795692847001, 78.88295505117935, 65.1817073285334, 65.41022066393323]
[12][[8, 16, 2, 2], 76.21838142571026, 49.929627971155114, 51.197671889435405, 76.01246105919003, 49.939370764534935, 50.918189944489136]
[13][[16, 8], 91.69201097841406, 72.73662557887211, 76.73718225632578, 91.27725856697819, 70.96201281706922, 75.13548543821209]
[14][[2, 2, 8, 8], 72.36851865588606, 53.02374001345599, 50.61275437797901, 71.78460169114375, 52.86354229015103, 50.18656215407813]
[15][[2, 2, 32], 90.31229137304354, 61.08549061381918, 62.95013503653726, 90.12016021361816, 60.42772664238274, 62.275670101843076]
[16][[8, 4, 4], 48.34952896669387, 57.077011186800554, 49.07044544712602, 47.46328437917223, 56.68051922258006, 48.246444093487]
[17][[8, 16, 2, 2, 2], 93.00496995771827, 65.30465964492123, 65.96430375289152, 92.70137961726746, 63.92059814749087, 64.82767714040156]
[18][[2, 16, 32, 4, 32], 80.83228247162674, 67.03283697218711, 67.15788254302285, 79.30574098798398, 65.54626981369272, 65.42517000722489]
[19][[2, 16, 32, 32], 75.69171426452044, 62.33481470933692, 63.55863744568408, 75.65643079661771, 62.39211834740743, 63.20811847585727]
[20][[32, 32], 91.12083673318003, 72.32359626665232, 75.94905425056719, 90.65420560747664, 70.8088375389383, 74.44633425298373]
[21][[16, 16], 90.1342630368667, 71.20613737784556, 74.27028852012047, 89.4971072541166, 69.48779190915792, 72.6595500188008]
[22][[32, 16, 4, 16, 2], 82.64965507009866, 41.147200976133874, 43.119901688010614, 82.10947930574099, 40.74262157615708, 42.69157088835595]
[23][[8, 2, 8, 8], 77.33105852681553, 62.022645528437316, 62.1193071514552, 76.90253671562083, 61.35390643389002, 61.248990760776614]
[24][[4, 2], 46.76952748312439, 50.54035811022786, 39.917660412611895, 45.92790387182911, 50.574434533160506, 39.65809942965147]
[25][[4, 4, 8, 4], 90.9279727023218, 62.49324077748489, 64.6497484461059, 91.21050289274588, 62.00868012041582, 64.18580093636021]
[26][[32, 8, 8, 16], 85.06787330316742, 64.81483594533418, 70.17145203353839, 84.20115709835336, 62.977357494781714, 68.54752705187767]
[27][[4, 16, 4, 8, 8], 87.39707736814776, 66.09230338031807, 71.86501508544423, 86.38184245660881, 64.12083447332142, 69.92807798219631]
[28][[4, 4, 32, 2, 2], 75.03894369853869, 18.759735924634672, 21.43492816883502, 75.99020916777927, 18.997552291944817, 21.589328612972565]
[29][[4, 4, 8, 2], 46.56182775758475, 51.4501005561882, 40.168891875606114, 45.66088117489986, 51.359416615072284, 39.806725041233925]
In [ ]:
# @title
#based on validation we pick the 3 best f1-score because of class imbalance 26, 15, 0
best = [5, 13, 20]
num_epochs = 50 #full test now
results = []

for j in best:
  performance = []
  layer_sizes = candidates[j]
  input_size = 15
  output_size = 4

  model = DeepFFNN(input_size, layer_sizes, output_size).to(device)
  optimizer = optim.AdamW(model.parameters(), lr=0.0005)
  training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
  print("Neurons per layer:", layer_sizes)
  train_accuracy,train_prec, train_f1 = testing_model(model,train_loader,device)
  print(f'Train Accuracy: {train_accuracy:.4f}')
  print(f'Train Precision: {train_prec:.4f}')
  val_accuracy, val_prec, val_f1 = testing_model(model,val_loader,device)
  print(f'Validation Accuracy: {val_accuracy:.4f}')
  print(f'Validation Precision: {val_prec:.4f}')
  test_accuracy, test_prec, test_f1 = testing_model(model,test_loader,device)
  print(f'Test Accuracy: {test_accuracy:.4f}')
  print(f'Test Precision: {test_prec:.4f}')
  performance.append(layer_sizes)
  performance.append(train_accuracy)
  performance.append(train_prec)
  performance.append(train_f1)
  performance.append(val_accuracy)
  performance.append(val_prec)
  performance.append(val_f1)
  performance.append(test_accuracy)
  performance.append(test_prec)
  performance.append(test_f1)
  results.append(performance)
Epoch 20/50, Train Loss: 0.2077, Val Loss: 0.3078
Epoch 40/50, Train Loss: 0.1827, Val Loss: 0.3079
Training took 27.1269 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 16, 32]
No description has been provided for this image
F1-score: 75.5780
Recall: 94.4980
Train Accuracy: 90.4384
Train Precision: 71.3393
No description has been provided for this image
F1-score: 73.2945
Recall: 93.0178
Validation Accuracy: 89.1411
Validation Precision: 69.4821
No description has been provided for this image
F1-score: 75.3286
Recall: 92.7704
Test Accuracy: 89.8309
Test Precision: 71.2867
Epoch 20/50, Train Loss: 0.3036, Val Loss: 0.3556
Epoch 40/50, Train Loss: 0.2322, Val Loss: 0.2927
Training took 26.1509 seconds to execute.
No description has been provided for this image
Neurons per layer: [16, 8]
No description has been provided for this image
F1-score: 76.4249
Recall: 92.2196
Train Accuracy: 90.8093
Train Precision: 70.8101
No description has been provided for this image
F1-score: 73.7647
Recall: 91.2976
Validation Accuracy: 89.2746
Validation Precision: 67.8741
No description has been provided for this image
F1-score: 75.8317
Recall: 90.0661
Test Accuracy: 89.9421
Test Precision: 70.0714
Epoch 20/50, Train Loss: 0.2295, Val Loss: 0.2957
Epoch 40/50, Train Loss: 0.1928, Val Loss: 0.2967
Training took 28.0311 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 32]
No description has been provided for this image
F1-score: 80.2987
Recall: 93.2662
Train Accuracy: 92.2706
Train Precision: 73.9669
No description has been provided for this image
F1-score: 77.4049
Recall: 92.4097
Validation Accuracy: 90.9212
Validation Precision: 70.8381
No description has been provided for this image
F1-score: 79.3463
Recall: 91.1169
Test Accuracy: 91.3663
Test Precision: 73.2299
In [ ]:
#we pick the best test set F1-score because we care about overall classification performance
print("layer sizes, tr_acc, tr_pr, tr_f1, va_acc, va_pr, va_f1, te_acc, te_pr, te_f1")
print("-------------")
for i in range(len(best)):
  print(f"[{i}]{results[i]}")
layer sizes, tr_acc, tr_pr, tr_f1, va_acc, va_pr, va_f1, te_acc, te_pr, te_f1
-------------
[0][[32, 16, 32], 90.43839477783547, 71.33932674711339, 75.57802031140801, 89.14107699154428, 69.48207262184495, 73.29453588601848, 89.83088562527814, 71.28669491798293, 75.32864435649812]
[1][[16, 8], 90.80928714487055, 70.81012648142351, 76.42488929824995, 89.2745883400089, 67.87406087103403, 73.76474705980804, 89.942145082332, 70.07138901317154, 75.83174366958278]
[2][[32, 32], 92.2706030709888, 73.96690250788728, 80.29874470011178, 90.92122830440587, 70.8380646590243, 77.40487743855444, 91.36626613262128, 73.2299258368904, 79.34625540267469]
In [ ]:
layer_sizes = [32, 32]
num_epochs = 50

The impact of batch size

In [ ]:
#Batch size: 1
best_batch = None
best_f1_batch = 0

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

model = DeepFFNN(input_size, layer_sizes, output_size).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.0005)
training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
print("Neurons per layer:", layer_sizes)
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, val_f1 = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
if val_f1 > best_f1_batch:
  best_f1_batch = val_f1
  best_batch = 1
Epoch 20/50, Train Loss: 0.1243, Val Loss: 0.1434
Epoch 40/50, Train Loss: 0.1198, Val Loss: 0.1386
Training took 1206.8615 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 32]
No description has been provided for this image
F1-score: 82.9231
Recall: 89.9423
Train Accuracy: 94.1770
Train Precision: 78.0547
No description has been provided for this image
F1-score: 80.9851
Recall: 88.4402
Validation Accuracy: 93.6582
Validation Precision: 75.9873
No description has been provided for this image
F1-score: 81.1147
Recall: 87.5018
Test Accuracy: 93.4134
Test Precision: 76.5866
In [ ]:
#Batch size: 32
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

model = DeepFFNN(input_size, layer_sizes, output_size).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.0005)
training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
print("Neurons per layer:", layer_sizes)
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, val_f1 = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
if val_f1 > best_f1_batch:
  best_f1_batch = val_f1
  best_batch = 32
Epoch 20/50, Train Loss: 0.2100, Val Loss: 0.2651
Epoch 40/50, Train Loss: 0.1827, Val Loss: 0.2559
Training took 44.7575 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 32]
No description has been provided for this image
F1-score: 80.8769
Recall: 93.3162
Train Accuracy: 92.4635
Train Precision: 74.4612
No description has been provided for this image
F1-score: 78.3417
Recall: 92.4637
Validation Accuracy: 91.3218
Validation Precision: 71.6979
No description has been provided for this image
F1-score: 80.3138
Recall: 91.1762
Test Accuracy: 91.7668
Test Precision: 74.1637
In [ ]:
#Batch size: 64
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

model = DeepFFNN(input_size, layer_sizes, output_size).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.0005)
training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
print("Neurons per layer:", layer_sizes)
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, val_f1 = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
if val_f1 > best_f1_batch:
  best_f1_batch = val_f1
  best_batch = 64
Epoch 20/50, Train Loss: 0.2339, Val Loss: 0.3139
Epoch 40/50, Train Loss: 0.1981, Val Loss: 0.3177
Training took 26.3490 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 32]
No description has been provided for this image
F1-score: 76.1982
Recall: 93.1362
Train Accuracy: 90.6090
Train Precision: 70.8453
No description has been provided for this image
F1-score: 73.3021
Recall: 91.5501
Validation Accuracy: 89.0966
Validation Precision: 67.9728
No description has been provided for this image
F1-score: 75.6354
Recall: 90.6304
Test Accuracy: 89.9866
Test Precision: 70.4150
In [ ]:
#Batch size: 128
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

model = DeepFFNN(input_size, layer_sizes, output_size).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.0005)
training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
print("Neurons per layer:", layer_sizes)
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, val_f1 = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
if val_f1 > best_f1_batch:
  best_f1_batch = val_f1
  best_batch = 128
Epoch 20/50, Train Loss: 0.2526, Val Loss: 0.3181
Epoch 40/50, Train Loss: 0.2129, Val Loss: 0.3014
Training took 15.6201 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 32]
No description has been provided for this image
F1-score: 74.9358
Recall: 93.4147
Train Accuracy: 90.0823
Train Precision: 70.9052
No description has been provided for this image
F1-score: 72.7672
Recall: 92.1156
Validation Accuracy: 88.9408
Validation Precision: 68.7623
No description has been provided for this image
F1-score: 74.6601
Recall: 91.4239
Test Accuracy: 89.4749
Test Precision: 70.5440
In [ ]:
#Batch size: 512
train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=512, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)

model = DeepFFNN(input_size, layer_sizes, output_size).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.0005)
training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
print("Neurons per layer:", layer_sizes)
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, val_f1 = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
if val_f1 > best_f1_batch:
  best_f1_batch = val_f1
  best_batch = 512
Epoch 20/50, Train Loss: 0.4744, Val Loss: 0.4976
Epoch 40/50, Train Loss: 0.2895, Val Loss: 0.3617
Training took 13.7247 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 32]
No description has been provided for this image
F1-score: 73.9633
Recall: 90.4660
Train Accuracy: 89.7708
Train Precision: 71.2887
No description has been provided for this image
F1-score: 72.3576
Recall: 89.8968
Validation Accuracy: 89.0966
Validation Precision: 69.7094
No description has been provided for this image
F1-score: 74.1800
Recall: 88.6615
Test Accuracy: 89.6084
Test Precision: 71.3528
In [ ]:
best_batch = 32

The best batch is 32

The impact of the Activation Function

In [ ]:
#Class with only Sigmoid
class sigmoidDeepFFNN(nn.Module):
    def __init__(self, input_size, layer_sizes, output_size):
        super(sigmoidDeepFFNN, self).__init__()
        layers = []

        # first hidden layer
        layers.append(nn.Linear(input_size, layer_sizes[0]))
        layers.append(nn.Sigmoid())

        # Hidden layers added
        for i in range(1, len(layer_sizes)):
            layers.append(nn.Linear(layer_sizes[i - 1], layer_sizes[i]))
            layers.append(nn.Sigmoid())


        layers.append(nn.Linear(layer_sizes[-1], output_size)) #for the last layer we don't use a relu as activation function, since every output <0 will be seen as a 0 with a relu, so it's better to use the automatic softmax applied by pytorch

        self.model = nn.Sequential(*layers) #Create an ordered sequence of layers that are applied one after another to the input data.
                                            # model becomes the whole neural network

    def forward(self, x):
        return self.model(x)

#Class with Sigmoid and Relu
class sigmoidReluDeepFFNN(nn.Module):
    def __init__(self, input_size, layer_sizes, output_size):
        super(sigmoidReluDeepFFNN, self).__init__()
        layers = []

        # first hidden layer
        layers.append(nn.Linear(input_size, layer_sizes[0]))
        layers.append(nn.Sigmoid())

        # Hidden layers added
        for i in range(1, len(layer_sizes)):
            layers.append(nn.Linear(layer_sizes[i - 1], layer_sizes[i]))
            if i % 2 == 0:
              layers.append(nn.Sigmoid())
            else:
              layers.append(nn.ReLU())  # "ReLU" con la "U" maiuscola!

        layers.append(nn.Linear(layer_sizes[-1], output_size)) #for the last layer we don't use a relu as activation function, since every output <0 will be seen as a 0 with a relu, so it's better to use the automatic softmax applied by pytorch

        self.model = nn.Sequential(*layers) #Create an ordered sequence of layers that are applied one after another to the input data.
                                            # model becomes the whole neural network

    def forward(self, x):
        return self.model(x)

#Class with only Linear
class linearDeepFFNN(nn.Module):
    def __init__(self, input_size, layer_sizes, output_size):
        super(linearDeepFFNN, self).__init__()
        layers = []

        # first hidden layer
        layers.append(nn.Linear(input_size, layer_sizes[0]))

        # Hidden layers added
        for i in range(1, len(layer_sizes)):
            layers.append(nn.Linear(layer_sizes[i - 1], layer_sizes[i]))

        layers.append(nn.Linear(layer_sizes[-1], output_size)) #for the last layer we don't use a relu as activation function, since every output <0 will be seen as a 0 with a relu, so it's better to use the automatic softmax applied by pytorch

        self.model = nn.Sequential(*layers) #Create an ordered sequence of layers that are applied one after another to the input data.
                                            # model becomes the whole neural network

    def forward(self, x):
        return self.model(x)

#Class with Relu + sigmoid at the end
class endsigmoidDeepFFNN(nn.Module):
    def __init__(self, input_size, layer_sizes, output_size):
        super(endsigmoidDeepFFNN, self).__init__()
        layers = []

        # first hidden layer
        layers.append(nn.Linear(input_size, layer_sizes[0]))


        # Hidden layers added
        for i in range(1, len(layer_sizes)):
            layers.append(nn.ReLU())
            layers.append(nn.Linear(layer_sizes[i - 1], layer_sizes[i]))


        layers.append(nn.Sigmoid())
        layers.append(nn.Linear(layer_sizes[-1], output_size)) #for the last layer we don't use a relu as activation function, since every output <0 will be seen as a 0 with a relu, so it's better to use the automatic softmax applied by pytorch

        self.model = nn.Sequential(*layers) #Create an ordered sequence of layers that are applied one after another to the input data.
                                            # model becomes the whole neural network

    def forward(self, x):
        return self.model(x)
In [ ]:
train_loader = DataLoader(train_dataset, batch_size=best_batch, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=best_batch, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=best_batch, shuffle=False)
In [ ]:
#only Sigmoid

model = sigmoidDeepFFNN(input_size, layer_sizes, output_size).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.0005)
training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
print("Neurons per layer:", layer_sizes)
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
Epoch 20/50, Train Loss: 0.4006, Val Loss: 0.4080
Epoch 40/50, Train Loss: 0.2965, Val Loss: 0.3216
Training took 42.1721 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 32]
No description has been provided for this image
F1-score: 73.4579
Recall: 90.2971
Train Accuracy: 88.2427
Train Precision: 68.1216
No description has been provided for this image
F1-score: 71.3386
Recall: 89.8056
Validation Accuracy: 86.9826
Validation Precision: 65.7588
No description has been provided for this image
F1-score: 73.4595
Recall: 88.6506
Test Accuracy: 87.7837
Test Precision: 67.9667
In [ ]:
#Sigmoid and Relu

model = sigmoidReluDeepFFNN(input_size, layer_sizes, output_size).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.0005)
training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
print("Neurons per layer:", layer_sizes)
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
Epoch 20/50, Train Loss: 0.3319, Val Loss: 0.3721
Epoch 40/50, Train Loss: 0.2416, Val Loss: 0.3071
Training took 44.4394 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 32]
No description has been provided for this image
F1-score: 75.1006
Recall: 91.4047
Train Accuracy: 90.0007
Train Precision: 70.2395
No description has been provided for this image
F1-score: 73.5588
Recall: 91.1228
Validation Accuracy: 89.4526
Validation Precision: 68.4915
No description has been provided for this image
F1-score: 74.8594
Recall: 89.5014
Test Accuracy: 89.5194
Test Precision: 69.8897
In [ ]:
#only Linear

model = linearDeepFFNN(input_size, layer_sizes, output_size).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.0005)
training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
print("Neurons per layer:", layer_sizes)
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
Epoch 20/50, Train Loss: 0.4579, Val Loss: 0.4488
Epoch 40/50, Train Loss: 0.4278, Val Loss: 0.4242
Training took 40.6213 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 32]
No description has been provided for this image
F1-score: 64.0689
Recall: 88.6860
Train Accuracy: 81.6112
Train Precision: 62.1008
No description has been provided for this image
F1-score: 62.5187
Recall: 88.2675
Validation Accuracy: 80.3738
Validation Precision: 60.9446
No description has been provided for this image
F1-score: 64.8231
Recall: 88.0633
Test Accuracy: 81.4642
Test Precision: 62.5538
In [ ]:
#Relu + sigmoid at the end

model = endsigmoidDeepFFNN(input_size, layer_sizes, output_size).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.0005)
training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
print("Neurons per layer:", layer_sizes)
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
Epoch 20/50, Train Loss: 0.2357, Val Loss: 0.2708
Epoch 40/50, Train Loss: 0.1934, Val Loss: 0.2337
Training took 42.6191 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 32]
No description has been provided for this image
F1-score: 80.9290
Recall: 93.2403
Train Accuracy: 92.3225
Train Precision: 74.5089
No description has been provided for this image
F1-score: 77.9809
Recall: 92.3172
Validation Accuracy: 90.8767
Validation Precision: 71.1916
No description has been provided for this image
F1-score: 79.7511
Recall: 91.0256
Test Accuracy: 91.3218
Test Precision: 73.5174

The impact of the Optimizer

We take the best combintion of activation function, that in our case is the one with all relu and at the end the sigmoid

In [ ]:
input_size = 15
output_size = 4
In [ ]:
#AdamW

model = endsigmoidDeepFFNN(input_size, layer_sizes, output_size).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.0005)
training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
print("Neurons per layer:", layer_sizes)
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
Epoch 20/50, Train Loss: 0.1803, Val Loss: 0.1833
Epoch 40/50, Train Loss: 0.1423, Val Loss: 0.1492
Training took 23.2073 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 32]
No description has been provided for this image
F1-score: 80.8995
Recall: 80.0247
Train Accuracy: 95.2377
Train Precision: 82.4567
No description has been provided for this image
F1-score: 78.3042
Recall: 78.1364
Validation Accuracy: 94.8376
Validation Precision: 78.8984
No description has been provided for this image
F1-score: 78.2730
Recall: 77.6642
Test Accuracy: 94.8376
Test Precision: 79.5608
In [ ]:
#SGD

model = endsigmoidDeepFFNN(input_size, layer_sizes, output_size).to(device)
optimizer = optim.SGD(model.parameters(), lr=0.0005)
training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
print("Neurons per layer:", layer_sizes)
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
Epoch 20/50, Train Loss: 0.7382, Val Loss: 0.7199
Epoch 40/50, Train Loss: 0.6945, Val Loss: 0.6771
Training took 22.6219 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 32]
No description has been provided for this image
F1-score: 21.4349
Recall: 25.0000
Train Accuracy: 75.0389
Train Precision: 18.7597
No description has been provided for this image
F1-score: 21.5893
Recall: 25.0000
Validation Accuracy: 75.9902
Validation Precision: 18.9976
No description has been provided for this image
F1-score: 21.3831
Recall: 25.0000
Test Accuracy: 74.7219
Test Precision: 18.6805
In [ ]:
#SGD with momentum 0.1

model = endsigmoidDeepFFNN(input_size, layer_sizes, output_size).to(device)
optimizer = optim.SGD(model.parameters(), lr=0.0005, momentum=0.1)
training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
print("Neurons per layer:", layer_sizes)
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
Epoch 20/50, Train Loss: 0.7434, Val Loss: 0.7251
Epoch 40/50, Train Loss: 0.6940, Val Loss: 0.6768
Training took 21.6938 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 32]
No description has been provided for this image
F1-score: 21.4349
Recall: 25.0000
Train Accuracy: 75.0389
Train Precision: 18.7597
No description has been provided for this image
F1-score: 21.5893
Recall: 25.0000
Validation Accuracy: 75.9902
Validation Precision: 18.9976
No description has been provided for this image
F1-score: 21.3831
Recall: 25.0000
Test Accuracy: 74.7219
Test Precision: 18.6805
In [ ]:
#SGD with momentum 0.5

model = endsigmoidDeepFFNN(input_size, layer_sizes, output_size).to(device)
optimizer = optim.SGD(model.parameters(), lr=0.0005, momentum=0.5)
training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
print("Neurons per layer:", layer_sizes)
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
Epoch 20/50, Train Loss: 0.6846, Val Loss: 0.6664
Epoch 40/50, Train Loss: 0.5473, Val Loss: 0.5319
Training took 21.6582 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 32]
No description has been provided for this image
F1-score: 45.2502
Recall: 44.7289
Train Accuracy: 88.6804
Train Precision: 46.5695
No description has been provided for this image
F1-score: 45.2641
Recall: 44.6702
Validation Accuracy: 89.0521
Validation Precision: 46.6302
No description has been provided for this image
F1-score: 45.1669
Recall: 44.7483
Test Accuracy: 88.2065
Test Precision: 46.3775
In [ ]:
#SGD with momentum 0.9

model = endsigmoidDeepFFNN(input_size, layer_sizes, output_size).to(device)
optimizer = optim.SGD(model.parameters(), lr=0.0005, momentum=0.9)
training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
print("Neurons per layer:", layer_sizes)
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
Epoch 20/50, Train Loss: 0.4403, Val Loss: 0.4245
Epoch 40/50, Train Loss: 0.3795, Val Loss: 0.3706
Training took 21.2807 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 32]
No description has been provided for this image
F1-score: 46.2999
Recall: 46.3874
Train Accuracy: 89.6966
Train Precision: 46.5919
No description has been provided for this image
F1-score: 46.3804
Recall: 46.3955
Validation Accuracy: 90.1202
Validation Precision: 46.7286
No description has been provided for this image
F1-score: 46.4231
Recall: 46.6093
Test Accuracy: 89.4526
Test Precision: 46.6214

Best optimizer AdamW

In [ ]:
#adamw
learning_rates = [0.0001, 0.0005, 0.001, 0.005]
layer_sizes = [32, 32]
num_epochs = 50
In [ ]:
#AdamW
for _ in learning_rates:
  print(f"Learning rate: {_}")
  model = endsigmoidDeepFFNN(input_size, layer_sizes, output_size).to(device)
  optimizer = optim.AdamW(model.parameters(), lr=0.0005)
  training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
  train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
  print(f'Train Accuracy: {train_accuracy:.4f}')
  print(f'Train Precision: {train_prec:.4f}')
  val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
  print(f'Validation Accuracy: {val_accuracy:.4f}')
  print(f'Validation Precision: {val_prec:.4f}')
  test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
  print(f'Test Accuracy: {test_accuracy:.4f}')
  print(f'Test Precision: {test_prec:.4f}')
Learning rate: 0.0001
Epoch 20/50, Train Loss: 0.1779, Val Loss: 0.1828
Epoch 40/50, Train Loss: 0.1338, Val Loss: 0.1473
Training took 23.7452 seconds to execute.
No description has been provided for this image
No description has been provided for this image
F1-score: 76.7444
Recall: 75.7861
Train Accuracy: 95.2303
Train Precision: 80.2910
No description has been provided for this image
F1-score: 76.2450
Recall: 75.5402
Validation Accuracy: 94.8376
Validation Precision: 78.9167
No description has been provided for this image
F1-score: 75.9581
Recall: 75.2393
Test Accuracy: 94.6150
Test Precision: 78.1679
Learning rate: 0.0005
Epoch 20/50, Train Loss: 0.1786, Val Loss: 0.1820
Epoch 40/50, Train Loss: 0.1372, Val Loss: 0.1477
Training took 23.7823 seconds to execute.
No description has been provided for this image
No description has been provided for this image
F1-score: 78.1567
Recall: 76.6993
Train Accuracy: 95.1413
Train Precision: 81.2622
No description has been provided for this image
F1-score: 77.8981
Recall: 77.0956
Validation Accuracy: 94.7708
Validation Precision: 79.5801
No description has been provided for this image
F1-score: 77.7155
Recall: 76.7954
Test Accuracy: 94.7931
Test Precision: 79.5463
Learning rate: 0.001
Epoch 20/50, Train Loss: 0.1839, Val Loss: 0.1871
Epoch 40/50, Train Loss: 0.1464, Val Loss: 0.1571
Training took 23.7132 seconds to execute.
No description has been provided for this image
No description has been provided for this image
F1-score: 79.1418
Recall: 77.6695
Train Accuracy: 95.2600
Train Precision: 82.3645
No description has been provided for this image
F1-score: 78.6309
Recall: 77.8649
Validation Accuracy: 94.8821
Validation Precision: 80.3597
No description has been provided for this image
F1-score: 77.7968
Recall: 77.0169
Test Accuracy: 94.8598
Test Precision: 79.5421
Learning rate: 0.005
Epoch 20/50, Train Loss: 0.1814, Val Loss: 0.1837
Epoch 40/50, Train Loss: 0.1440, Val Loss: 0.1502
Training took 23.7207 seconds to execute.
No description has been provided for this image
No description has been provided for this image
F1-score: 79.5081
Recall: 77.9007
Train Accuracy: 95.2155
Train Precision: 82.7242
No description has been provided for this image
F1-score: 78.7664
Recall: 78.0514
Validation Accuracy: 94.8153
Validation Precision: 80.2109
No description has been provided for this image
F1-score: 77.7527
Recall: 76.8029
Test Accuracy: 94.8153
Test Precision: 79.6063
In [ ]:
#adamW lr=0.0005 but with 30 epochs
num_epochs = 30
In [ ]:
model = endsigmoidDeepFFNN(input_size, layer_sizes, output_size).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.0005)
training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
print("Neurons per layer:", layer_sizes)
train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
Epoch 20/30, Train Loss: 0.1819, Val Loss: 0.1871
Training took 14.5832 seconds to execute.
No description has been provided for this image
Neurons per layer: [32, 32]
No description has been provided for this image
F1-score: 68.2954
Recall: 68.6538
Train Accuracy: 94.2660
Train Precision: 68.3192
No description has been provided for this image
F1-score: 67.7705
Recall: 68.2604
Validation Accuracy: 94.1923
Validation Precision: 67.7304
No description has been provided for this image
F1-score: 68.1478
Recall: 68.6566
Test Accuracy: 93.9697
Test Precision: 68.0386

Task 5: Overfitting and Regularization¶

Baseline FFNN

In [ ]:
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
In [ ]:
class LastModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(LastModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 16),
            nn.ReLU(),
            nn.Linear(16, output_size)  # No activation on output layer
        )

    def forward(self, x):
        return self.model(x)
In [ ]:
criterion = nn.CrossEntropyLoss()
epochs = 50

model = LastModel(input_size=15,  output_size=4).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.0005)
training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, epochs, criterion)

train_accuracy,train_prec, _ = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, _ = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, _ = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
Epoch 20/50, Train Loss: 0.1225, Val Loss: 0.1408
Epoch 40/50, Train Loss: 0.0902, Val Loss: 0.1057
Training took 20.9864 seconds to execute.
No description has been provided for this image
No description has been provided for this image
F1-score: 86.4885
Recall: 87.5240
Train Accuracy: 96.7732
Train Precision: 85.8489
No description has been provided for this image
F1-score: 85.9639
Recall: 88.1143
Validation Accuracy: 96.5287
Validation Precision: 84.9157
No description has been provided for this image
F1-score: 86.4583
Recall: 87.8267
Test Accuracy: 96.4619
Test Precision: 85.5880

adding dropout

In [ ]:
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
In [ ]:
class LastFFNN(nn.Module):
    def __init__(self, input_size, layer_sizes, output_size, use_batchnorm, dropout_rate):
        super(LastFFNN, self).__init__()
        layers = []

        # first hidden layer
        layers.append(nn.Linear(input_size, layer_sizes[0]))
        layers.append(nn.ReLU())

        # Hidden layers added
        for i in range(1, len(layer_sizes)):
            layers.append(nn.Linear(layer_sizes[i - 1], layer_sizes[i]))
            if use_batchnorm:
              layers.append(nn.BatchNorm1d(layer_sizes[i]))
            layers.append(nn.ReLU())
            if dropout_rate > 0.0:
              layers.append(nn.Dropout(dropout_rate))



        layers.append(nn.Linear(layer_sizes[-1], output_size)) #for the last layer we don't use a relu as activation function, since every output <0 will be seen as a 0 with a relu, so it's better to use the automatic softmax applied by pytorch

        self.model = nn.Sequential(*layers) #Create an ordered sequence of layers that are applied one after another to the input data.
                                            # model becomes the whole neural network

    def forward(self, x):
        return self.model(x)
In [ ]:
input_size = 15
layer_sizes = [256, 128, 64, 32, 16]
dropout_possible = [0.2, 0.3, 0.5]
use_batchnorm = False
output_size = 4
In [ ]:
results = []
for i in range(len(dropout_possible)):
  performance = []

  model = LastFFNN(input_size, layer_sizes, output_size, use_batchnorm, dropout_possible[i]).to(device)
  optimizer = optim.AdamW(model.parameters(), lr=0.0005)
  training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
  print("Dropout", dropout_possible[i])
  train_accuracy,train_prec, train_f1 = testing_model(model,train_loader,device)
  print(f'Train Accuracy: {train_accuracy:.4f}')
  print(f'Train Precision: {train_prec:.4f}')
  val_accuracy, val_prec, val_f1 = testing_model(model,val_loader,device)
  print(f'Validation Accuracy: {val_accuracy:.4f}')
  print(f'Validation Precision: {val_prec:.4f}')
  test_accuracy, test_prec, test_f1 = testing_model(model,test_loader,device)
  print(f'Test Accuracy: {test_accuracy:.4f}')
  print(f'Test Precision: {test_prec:.4f}')
  performance.append(dropout_possible[i])
  performance.append(train_accuracy)
  performance.append(train_prec)
  performance.append(train_f1)
  performance.append(val_accuracy)
  performance.append(val_prec)
  performance.append(val_f1)
  performance.append(test_accuracy)
  performance.append(test_prec)
  performance.append(test_f1)
  results.append(performance)
Epoch 20/50, Train Loss: 0.1324, Val Loss: 0.1371
Epoch 40/50, Train Loss: 0.1109, Val Loss: 0.1219
Training took 20.5519 seconds to execute.
No description has been provided for this image
Dropout 0.2
No description has been provided for this image
F1-score: 82.5190
Recall: 81.2979
Train Accuracy: 96.5581
Train Precision: 83.9549
No description has been provided for this image
F1-score: 82.0983
Recall: 81.4460
Validation Accuracy: 96.3284
Validation Precision: 82.8831
No description has been provided for this image
F1-score: 79.9556
Recall: 78.6331
Test Accuracy: 96.0392
Test Precision: 81.5845
Epoch 20/50, Train Loss: 0.1480, Val Loss: 0.1557
Epoch 40/50, Train Loss: 0.1182, Val Loss: 0.1245
Training took 19.8149 seconds to execute.
No description has been provided for this image
Dropout 0.3
No description has been provided for this image
F1-score: 71.6789
Recall: 71.6591
Train Accuracy: 96.3356
Train Precision: 71.7565
No description has been provided for this image
F1-score: 71.1983
Recall: 70.8150
Validation Accuracy: 96.1504
Validation Precision: 71.6341
No description has been provided for this image
F1-score: 71.1457
Recall: 71.3748
Test Accuracy: 95.8389
Test Precision: 70.9638
Epoch 20/50, Train Loss: 0.2030, Val Loss: 0.1800
Epoch 40/50, Train Loss: 0.1566, Val Loss: 0.1367
Training took 20.5195 seconds to execute.
No description has been provided for this image
Dropout 0.5
No description has been provided for this image
F1-score: 69.7874
Recall: 71.2956
Train Accuracy: 95.3119
Train Precision: 68.6465
No description has been provided for this image
F1-score: 68.8505
Recall: 70.4051
Validation Accuracy: 94.9043
Validation Precision: 67.6798
No description has been provided for this image
F1-score: 69.4596
Recall: 71.0621
Test Accuracy: 94.9043
Test Precision: 68.1992
In [ ]:
for i in range(len(dropout_possible)):
  print(f"[{i}]{results[i]}")
[0][0.2, 96.55811883391439, 83.95488100127295, 82.51902098141079, 96.32843791722297, 82.8831035103639, 82.09829092725245, 96.03916332888296, 81.58450875601362, 79.9556022369935]
[1][0.3, 96.33558341369334, 71.75648807821649, 71.67889474630958, 96.15042278593681, 71.63407408905198, 71.19833693245742, 95.83889630618603, 70.96382538470898, 71.1456617243273]
[2][0.5, 95.3119204806765, 68.64647406938934, 69.7874422511396, 94.9043168669337, 67.67979122014928, 68.85046535142463, 94.9043168669337, 68.1992319607892, 69.45956502739949]

apply batch norm only

In [ ]:
input_size = 15
layer_sizes = [256, 128, 64, 32, 16]
dropout_possible = 0.0
use_batchnorm = True
output_size = 4
In [ ]:
model = LastFFNN(input_size, layer_sizes, output_size, use_batchnorm, dropout_possible).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.0005)
training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
train_accuracy,train_prec, train_f1 = testing_model(model,train_loader,device)
print(f'Train Accuracy: {train_accuracy:.4f}')
print(f'Train Precision: {train_prec:.4f}')
val_accuracy, val_prec, val_f1 = testing_model(model,val_loader,device)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print(f'Validation Precision: {val_prec:.4f}')
test_accuracy, test_prec, test_f1 = testing_model(model,test_loader,device)
print(f'Test Accuracy: {test_accuracy:.4f}')
print(f'Test Precision: {test_prec:.4f}')
Epoch 20/50, Train Loss: 0.1099, Val Loss: 0.1650
Epoch 40/50, Train Loss: 0.0877, Val Loss: 0.1177
Training took 23.2273 seconds to execute.
No description has been provided for this image
No description has been provided for this image
F1-score: 71.1591
Recall: 71.5148
Train Accuracy: 95.6828
Train Precision: 70.8080
No description has been provided for this image
F1-score: 70.5642
Recall: 70.7644
Validation Accuracy: 95.3049
Validation Precision: 70.3759
No description has been provided for this image
F1-score: 70.5715
Recall: 71.1028
Test Accuracy: 95.1268
Test Precision: 70.0520

apply weight decay only

In [ ]:
input_size = 15
layer_sizes = [256, 128, 64, 32, 16]
dropout_possible = 0.0
use_batchnorm = False
output_size = 4
weight_decay_possible = [1e-4, 1e-3, 1e-2]
In [ ]:
results = []
for i in range(len(weight_decay_possible)):
  performance = []

  model = LastFFNN(input_size, layer_sizes, output_size, use_batchnorm, dropout_possible).to(device)
  optimizer = optim.AdamW(model.parameters(), lr=0.0005, weight_decay=weight_decay_possible[i])
  training_loop(model, train_loader, val_loader, train_dataset, val_dataset, device, optimizer, num_epochs, criterion)
  print("Weight decay", weight_decay_possible[i])
  train_accuracy,train_prec, train_f1 = testing_model(model,train_loader,device)
  print(f'Train Accuracy: {train_accuracy:.4f}')
  print(f'Train Precision: {train_prec:.4f}')
  val_accuracy, val_prec, val_f1 = testing_model(model,val_loader,device)
  print(f'Validation Accuracy: {val_accuracy:.4f}')
  print(f'Validation Precision: {val_prec:.4f}')
  test_accuracy, test_prec, test_f1 = testing_model(model,test_loader,device)
  print(f'Test Accuracy: {test_accuracy:.4f}')
  print(f'Test Precision: {test_prec:.4f}')
  performance.append(weight_decay_possible[i])
  performance.append(train_accuracy)
  performance.append(train_prec)
  performance.append(train_f1)
  performance.append(val_accuracy)
  performance.append(val_prec)
  performance.append(val_f1)
  performance.append(test_accuracy)
  performance.append(test_prec)
  performance.append(test_f1)
  results.append(performance)
Epoch 20/50, Train Loss: 0.1073, Val Loss: 0.1220
Epoch 40/50, Train Loss: 0.0883, Val Loss: 0.1066
Training took 23.4895 seconds to execute.
No description has been provided for this image
Weight decay 0.0001
No description has been provided for this image
F1-score: 85.6490
Recall: 82.4453
Train Accuracy: 96.6768
Train Precision: 92.5449
No description has been provided for this image
F1-score: 83.5200
Recall: 80.2392
Validation Accuracy: 96.2617
Validation Precision: 90.7865
No description has been provided for this image
F1-score: 82.0474
Recall: 79.1804
Test Accuracy: 96.1504
Test Precision: 91.3785
Epoch 20/50, Train Loss: 0.1140, Val Loss: 0.1352
Epoch 40/50, Train Loss: 0.0917, Val Loss: 0.1041
Training took 18.9354 seconds to execute.
No description has been provided for this image
Weight decay 0.001
No description has been provided for this image
F1-score: 87.7231
Recall: 91.1976
Train Accuracy: 96.7213
Train Precision: 85.5104
No description has been provided for this image
F1-score: 86.7585
Recall: 91.1657
Validation Accuracy: 96.6177
Validation Precision: 84.6206
No description has been provided for this image
F1-score: 87.0739
Recall: 90.1371
Test Accuracy: 96.3062
Test Precision: 85.0356
Epoch 20/50, Train Loss: 0.1119, Val Loss: 0.1241
Epoch 40/50, Train Loss: 0.0925, Val Loss: 0.1087
Training took 19.7830 seconds to execute.
No description has been provided for this image
Weight decay 0.01
No description has been provided for this image
F1-score: 88.1428
Recall: 91.5920
Train Accuracy: 96.7807
Train Precision: 85.9251
No description has been provided for this image
F1-score: 86.8449
Recall: 90.2673
Validation Accuracy: 96.5287
Validation Precision: 84.9325
No description has been provided for this image
F1-score: 85.9276
Recall: 88.2490
Test Accuracy: 96.1504
Test Precision: 84.2919
In [ ]:
for i in range(len(weight_decay_possible)):
  print(f"[{i}]{results[i]}")
[0][0.0001, 96.67680439136562, 92.54494633627286, 85.64895889134517, 96.26168224299066, 90.78654571426563, 83.52003833376685, 96.15042278593681, 91.37854530018177, 82.0474001681567]
[1][0.001, 96.72131147540983, 85.51035851447982, 87.7231465764851, 96.61771250556296, 84.62063024741808, 86.75846745482573, 96.3061860258122, 85.035620910736, 87.07386413204456]
[2][0.01, 96.78065425413544, 85.92513709600169, 88.14279610565526, 96.52870493991989, 84.93246213267615, 86.84486426068861, 96.15042278593681, 84.29191102871214, 85.9276418974767]